## ---- Household-level estimates ----
load("est/correlates_of_rent_hh.rdata")

## Recode NAs in model_info
model_info <- model_info %>%
  mutate_at(.vars = vars(owner),
            .funs = ~ ifelse(is.na(.), "none", as.character(.)))

## Harmonize variable names
colnames(model_coefficients) <- colnames(model_coefficients) %>%
  gsub("X.", "", .) %>%
  gsub("\\._", "\\_", .)

## Recode x_names in model_coefficients (to match column names)
model_coefficients <- model_coefficients %>%
  mutate(
    x_names = x_names %>%
      gsub("\\(", "", .) %>%
      gsub("\\)", "", .) %>%
      gsub("<", "\\.", .) %>%
      gsub(">", "\\.", .) %>%
      gsub("=", "\\.", .) %>%
      gsub(" ", "\\.", .) %>%
      gsub("\\:", "\\.", .) %>%
      gsub("&", "\\.", .) %>%
      gsub("-", "\\.", .) %>%
      gsub("\\`", "", .)
  )

## Split model_coefficients by model_id, drop empty columns
model_coefficients <- model_coefficients %>%
  group_by(model_id) %>%
  group_split %>%
  lapply(select_if, all_na)

## Stack by imputations
model_est <- model_coefficients %>%
  lapply(., function(mod) {
    varying <- mod %>%
      dplyr::select(contains("_imp")) %>%
      names() %>%
      split(
        .,
        mod %>%
          dplyr::select(contains("_imp")) %>%
          names() %>%
          sapply(function(name)
            substr(name, 1, nchar(name) - 5)) %>%
          factor(
            .,
            levels = mod %>%
              dplyr::select(contains("_imp")) %>%
              names() %>%
              sapply(function(name)
                substr(name, 1, nchar(name) - 5)) %>%
              unique()
          )
      ) %>%
      lapply(function(names)
        sapply(names, function(name)
          which(names(mod) == name)))
    mod <- mod %>%
      as.data.frame() %>%
      reshape(
        direction = "long",
        varying = varying,
        v.names = names(varying),
        timevar = "imp",
        times = 1:5
      ) %>%
      dplyr::select(-id)
    
    return(mod)
  })

## Split by model_id and imp
model_est <- model_est %>%
  lapply(., function(mod) {
    mod %>%
      group_by(imp) %>%
      group_split()
  })

## Extract quantities of interest
main_effects <- list()
for (mid in seq_along(model_est)) {
  ## Model ID
  model_id <- unique(model_est[[mid]][[1]]$model_id)
  
  ## Initialization
  main_effects[[as.character(model_id)]] <- list()
  
  ## Tables
  main_effects[[as.character(model_id)]]$info <- model_info %>%
    filter(model_id == !!model_id) %>%
    mutate(
      converged = model_info %>%
        filter(model_id == !!model_id) %>%
        dplyr::select(starts_with("convergence")) %>%
        rowSums() == 0,
      sigma_res = model_info %>%
        filter(model_id == !!model_id) %>%
        dplyr::select(starts_with("sigma_res")) %>%
        rowMeans(),
      sigma_hh = model_info %>%
        filter(model_id == !!model_id) %>%
        dplyr::select(starts_with("sigma_id")) %>%
        rowMeans(),
      sigma_plz = model_info %>%
        filter(model_id == !!model_id) %>%
        dplyr::select(starts_with("sigma_plz")) %>%
        rowMeans(),
      sigma_kkz = model_info %>%
        filter(model_id == !!model_id) %>%
        dplyr::select(starts_with("sigma_kkz")) %>%
        rowMeans(),
    )  %>%
    dplyr::select(
      model_id,
      converged,
      outcome,
      predictor,
      owner,
      year_fe,
      N_obs,
      N_hh,
      N_plz,
      N_kkz,
      sigma_res,
      sigma_hh,
      sigma_plz,
      sigma_kkz
    )
  
  ## Main predictor
  pred_cwu <- paste0(main_effects[[as.character(model_id)]]$info$predictor, "_cwu")
  pred_umn <- paste0(main_effects[[as.character(model_id)]]$info$predictor, "_umn")
  
  ## Quantities of interest
  ## Positions
  x_names <- as.character(model_est[[mid]][[1]]$x_names)
  pos_within  <- which(x_names == pred_cwu)
  pos_between <- which(x_names == pred_umn)
  
  
  ## Within and between effects
  main_effects[[as.character(model_id)]]$effects <-
    model_est[[mid]] %>%
    lapply(function (imp) {
      b <- imp$b
      vcov <- as.matrix(imp[x_names])
      sim <- mvrnorm(1000L, b, vcov)
      out <- data.frame(within  = sim[, pos_within], between = sim[, pos_between])
      return(out)
    }) %>%
    bind_rows() %>%
    apply(2, quantile, c(.5, .025, .975))
}

save(
  main_effects,
  model_info,
  file = "est/correlates_of_rent_hh_pp.rdata"
)

## ---- Neighborhood-level estimates -----
load("est/correlates_of_rent_nbh.rdata")

## Harmonize variable names
colnames(model_coefficients) <- colnames(model_coefficients) %>%
  gsub("X.", "", .) %>%
  gsub("\\._", "\\_", .) %>%
  gsub("\\.", "", .)

## Recode x_names in model_coefficients (to match column names)
model_coefficients <- model_coefficients %>%
  mutate(
    x_names = x_names %>%
      gsub("\\(", "", .) %>%
      gsub("\\)", "", .) %>%
      gsub("<", "\\.", .) %>%
      gsub(">", "\\.", .) %>%
      gsub("=", "\\.", .) %>%
      gsub(" ", "\\.", .) %>%
      gsub("\\:", "\\.", .) %>%
      gsub("&", "\\.", .) %>%
      gsub("-", "\\.", .) %>%
      gsub("\\`", "", .)
  )

## Split model_coefficients by model_id, drop empty columns
model_est <- model_coefficients %>%
  group_by(model_id) %>%
  group_split %>%
  lapply(select_if, all_na)

## Extract quantities of interest
main_effects <- list()
for (mid in seq_along(model_est)) {
  ## Model ID
  model_id <- unique(model_est[[mid]]$model_id)
  
  ## Initialization
  main_effects[[as.character(model_id)]] <- list()
  
  ## Tables
  main_effects[[as.character(model_id)]]$info <- model_info %>%
    filter(model_id == !!model_id) %>%
    mutate(
      converged = model_info %>%
        filter(model_id == !!model_id) %>%
        dplyr::select(starts_with("convergence")) %>%
        rowSums() == 0,
      sigma_pl8 = model_info %>%
        filter(model_id == !!model_id) %>%
        dplyr::select(starts_with("sigma_res")) %>%
        rowMeans(),
      sigma_plz = model_info %>%
        filter(model_id == !!model_id) %>%
        dplyr::select(starts_with("sigma_plz")) %>%
        rowMeans(),
      sigma_kkz = model_info %>%
        filter(model_id == !!model_id) %>%
        dplyr::select(starts_with("sigma_kkz")) %>%
        rowMeans(),
    )  %>%
    dplyr::select(
      model_id,
      converged,
      outcome,
      predictor,
      year_fe,
      N_pl8,
      N_plz,
      N_kkz,
      sigma_pl8,
      sigma_plz,
      sigma_kkz
    )
  
  ## Main predictor
  pred_cwu <- paste0(main_effects[[as.character(model_id)]]$info$predictor,
                     "_cwu")
  pred_umn <- paste0(main_effects[[as.character(model_id)]]$info$predictor,
                     "_umn")
  
  ## Quantities of interest
  ## Positions
  x_names <- as.character(model_est[[mid]]$x_names)
  pos_within  <- which(x_names == pred_cwu)
  pos_between <- which(x_names == pred_umn)
    
    
    ## Within and between effects
    main_effects[[as.character(model_id)]]$effects <-
      model_est[[mid]] %>%
      (function (imp) {
        b <- imp$b
        vcov <- as.matrix(imp[x_names])
        sim <- mvrnorm(1000L, b, vcov)
        out <- data.frame(within  = sim[, pos_within],
                          between = sim[, pos_between])
        return(out)
      }) %>%
      apply(2, quantile, c(.5, .025, .975))
}

save(
  main_effects,
  model_info,
  file = "est/correlates_of_rent_nbh_pp.rdata"
)

## ---- Output ----
load("est/correlates_of_rent_hh_pp.rdata")
cor_hh <- main_effects
info_hh <- lapply(cor_hh, function(x) x$info) %>% 
  bind_rows() %>%
  filter(predictor == "rent")
load("est/correlates_of_rent_nbh_pp.rdata")
cor_nbh <- main_effects 
info_nbh <- lapply(cor_nbh, function(x) x$info) %>% 
  bind_rows() %>%
  filter(predictor == "rent")

## Household-level
models_hh <- c(1, 3, 4, 2)
level <- rep("Household", length(models_hh))
model_names <- c(
  "Rent/sqm (R)",
  "Asset wealth (R)",
  "Asset wealth (O)",
  "Net wealth from residence (O)"
)
fx <- cor_hh[models_hh] %>%
  sapply(function(d)
    as.vector(d$effects[, 1])) %>%
  t() %>%
  round(2) %>% 
  format(nsmall = 2) %>%
  trimws() %>%
  apply(1, function(row) paste0(row[1], " ", "(", row[2], ", ", row[3], ")"))
info_hh <- info_hh %>%
  filter(model_id %in% models_hh)

table_hh <- cbind(
  model_names,
  level,
  fx,
  info_hh$N_obs,
  info_hh$N_plz
)

## Household-level
models_nbh <- 1:2
level <- rep("Neighborhood", length(models_nbh))
model_names <- c(
  "Social Status",
  "Purchasing Power"
)
fx <- cor_nbh[models_nbh] %>%
  sapply(function(d)
    as.vector(d$effects[, 1])) %>%
  t() %>%
  round(2) %>% 
  format(nsmall = 2) %>%
  trimws() %>%
  apply(1, function(row) paste0(row[1], " ", "(", row[2], ", ", row[3], ")"))
info_nbh <- info_nbh %>%
  filter(model_id %in% models_nbh)
info_nbh
table_nbh <- cbind(
  model_names,
  level,
  fx,
  info_nbh$N_pl8,
  info_nbh$N_plz
)

## Print table
colnames(table_hh) <- c("Outcome", "Level", "Effect", "$N_{Obs}$", "$N_{PLZ}$")
colnames(table_nbh) <- c("Outcome", "Level", "Effect", "$N_{Obs}$", "$N_{PLZ}$")

tab <- rbind(
  table_hh,
  table_nbh
)

print(
  xtable(tab,
         label = "tab:cor",
         caption = "Twoway within-estimates of local rents on various outcomes.",
         align = c("l", "l", "l", "r", "r", "r")),
  sanitize.text.function = identity,
  include.rownames = FALSE,
  file = "tex/correlates.tex"
)
